load packages

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggridges)

load NOAA weather data

weather_df = 
  rnoaa::meteo_pull_monitors(
    c("USW00094728", "USW00022534", "USS0023B17S"),
    var = c("PRCP", "TMIN", "TMAX"), 
    date_min = "2021-01-01",
    date_max = "2022-12-31") |>
  mutate(
    name = case_match(
      id, 
      "USW00094728" ~ "CentralPark_NY", 
      "USW00022534" ~ "Molokai_HI",
      "USS0023B17S" ~ "Waterhole_WA"),
    tmin = tmin / 10,
    tmax = tmax / 10) |>
  select(name, id, everything())
## using cached file: /Users/miaisaacs/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USW00094728.dly
## date created (size, mb): 2024-09-26 09:04:42.987907 (8.651)
## file min/max dates: 1869-01-01 / 2024-09-30
## using cached file: /Users/miaisaacs/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USW00022534.dly
## date created (size, mb): 2024-09-26 09:05:03.789332 (3.932)
## file min/max dates: 1949-10-01 / 2024-09-30
## using cached file: /Users/miaisaacs/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USS0023B17S.dly
## date created (size, mb): 2024-09-26 09:05:11.000259 (1.036)
## file min/max dates: 1999-09-01 / 2024-09-30

first plot

ggplot(weather_df, aes(x = tmin, y = tmax)) +
  geom_point()
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) +
  geom_point()
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggp_weather_scatterplot = 
  weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) +
  geom_point()

ggp_weather_scatterplot
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

weather_df |> 
  filter(is.na(tmax))
## # A tibble: 17 × 6
##    name         id          date        prcp  tmax  tmin
##    <chr>        <chr>       <date>     <dbl> <dbl> <dbl>
##  1 Molokai_HI   USW00022534 2022-05-31    NA    NA    NA
##  2 Waterhole_WA USS0023B17S 2021-03-09    NA    NA    NA
##  3 Waterhole_WA USS0023B17S 2021-12-07    51    NA    NA
##  4 Waterhole_WA USS0023B17S 2021-12-31     0    NA    NA
##  5 Waterhole_WA USS0023B17S 2022-02-03     0    NA    NA
##  6 Waterhole_WA USS0023B17S 2022-08-09    NA    NA    NA
##  7 Waterhole_WA USS0023B17S 2022-08-10    NA    NA    NA
##  8 Waterhole_WA USS0023B17S 2022-08-11    NA    NA    NA
##  9 Waterhole_WA USS0023B17S 2022-08-12    NA    NA    NA
## 10 Waterhole_WA USS0023B17S 2022-08-13    NA    NA    NA
## 11 Waterhole_WA USS0023B17S 2022-08-14    NA    NA    NA
## 12 Waterhole_WA USS0023B17S 2022-08-15    NA    NA    NA
## 13 Waterhole_WA USS0023B17S 2022-08-16    NA    NA    NA
## 14 Waterhole_WA USS0023B17S 2022-08-17    NA    NA    NA
## 15 Waterhole_WA USS0023B17S 2022-08-18    NA    NA    NA
## 16 Waterhole_WA USS0023B17S 2022-08-19    NA    NA    NA
## 17 Waterhole_WA USS0023B17S 2022-12-31    76    NA    NA

fancier scatterplots

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

use faceting

ggplot(weather_df, aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = .5) +
  geom_smooth(se = FALSE) + 
  facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

Let’s make a somewhat more interesting plot

ggplot(weather_df, aes(x = date, y = tmax, color = name)) + 
  geom_point(aes(size = prcp), alpha = .3) +
  geom_smooth(se = FALSE) + 
  facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_point()`).

learning assessment 1

weather_df |> 
  filter(name == "CentralPark_NY") |> 
  mutate(
    tmax_fahr = tmax * (9 / 5) + 32,
    tmin_fahr = tmin * (9 / 5) + 32) |> 
  ggplot(aes(x = tmin_fahr, y = tmax_fahr)) +
  geom_point(alpha = .5) + 
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'

small things

ggplot(weather_df, aes(x = date, y = tmax, color = name)) + 
  geom_smooth(se = FALSE) 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).

weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) +
  geom_hex()
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_binhex()`).

weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) +
  geom_point(color = "pink")
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

univariate plots

weather_df |> 
  ggplot(aes(x = tmin)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(weather_df, aes(x = tmax, fill = name)) + 
  geom_histogram(position = "dodge", binwidth = 2)
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_bin()`).

how would i fix this? maybe facet

ggplot(weather_df, aes(x = tmax, fill = name)) + 
  geom_histogram() +
  facet_grid(. ~ name)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_bin()`).

maybe a density plot?

ggplot(weather_df, aes(x = tmax, fill = name)) + 
  geom_density(alpha = .4, adjust = .5, color = "blue")
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_density()`).

make boxplot

ggplot(weather_df, aes(x = name, y = tmax, fill = name)) + 
  geom_boxplot()
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

make violin plot

ggplot(weather_df, aes(x = name, y = tmax)) + 
  geom_violin(aes(fill = name), alpha = .5) + 
  stat_summary(fun = "median", color = "blue")
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_segment()`).

make ridge plot

ggplot(weather_df, aes(x = tmax, y = name)) + 
  geom_density_ridges(scale = .85)
## Picking joint bandwidth of 1.54
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

learning assessment 2

ggplot(weather_df, aes(x = prcp)) + 
  geom_density(aes(fill = name), alpha = .5) 
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_density()`).

ggplot(weather_df, aes(x = prcp, y = name)) + 
  geom_density_ridges(scale = .85)
## Picking joint bandwidth of 9.22
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

ggplot(weather_df, aes(y = prcp, x = name)) + 
  geom_boxplot() 
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

weather_df |> 
  filter(prcp > 0) |> 
  ggplot(aes(x = prcp, y = name)) + 
  geom_density_ridges(scale = .85)
## Picking joint bandwidth of 20.6

saving and embedding plots

ggp_weather = 
  ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) 

ggsave("ggp_weather.pdf", ggp_weather, width = 8, height = 5)
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
knitr::opts_chunk$set(
  fig.width = 6,
  fig.asp = .6,
  out.width = "90%"
)